Chapter 3 – Geocentric Models

Author

Danton Noriega-Goodwin

Published

February 15, 2024

Code
# set-up
using Plots
using Colors
using Random
using StatsBase
using StatsPlots
using DataFrames
using NamedArrays
using Distributions
using SpecialFunctions

default(
  labels=false, fontfamily="sans-serif", titlefontsize=8, labelfontsize=6,
  legendfontsize=4)

Code 3.1

pos = [ sum(rand(Uniform(-1,1), 16)) for _ in 1:1000 ];

Fig 3.2

# NOT IN BOOK
## ATTEMPT TO REPLICATE FIG 3.2
function random_walk(n_steps=4, N=1000)
  [ rand(Uniform(-1,1), n_steps) for _ in 1:N ]
end

# final position after N steps
pos_steps_4  = [ sum(_) for _ in random_walk( 4) ];
pos_steps_8  = [ sum(_) for _ in random_walk( 8) ];
pos_steps_16 = [ sum(_) for _ in random_walk(16) ];

# position after each step (path)
path_steps_16 = map( x -> cumsum(vcat(0,x)), random_walk(16,200) );

l = @layout [ grid(1,1) ; grid(1,3) ]
plots = Plots.Plot[ plot() for i in 1:4 ]
plots[1] = plot(0:16, hcat(path_steps_16...), linealpha = 0.2, 
  ylabel="position", xlabel="step number", color=:steelblue2)
plot!( plots[1], 0:16, path_steps_16[42], 
      xticks=0:4:16, linecolor=:black, linewidth=1.5)
vline!( plots[1], [4,8,16], linecolor=:black, 
      linewidth=1.5, linestyle=:dash)

plots[2:4] .= 
  density( pos_steps_4 , linewidth=2, bandwidth=0.05, ylabel="density",
          xtickfontsize=6, titlefontsize=8, title="4 steps"),
  density( pos_steps_8 , linewidth=2, bandwidth=0.05, 
          xtickfontsize=6, titlefontsize=8, title="8 steps"),
  density( pos_steps_16, linewidth=2, bandwidth=0.05, 
          xtickfontsize=6, titlefontsize=8, title="16 steps")

# add normal curve
x_values = -6:0.1:6
μ = mean(pos_steps_16)
σ = std(pos_steps_16)
y_values = pdf(Normal(μ, σ), x_values)
plot!(plots[4], x_values, y_values, linecolor=:black, linewidth=2)
plot( plots..., layout=l )

Code 3.2

function sim_weight( H , b , sd )
  U = rand(Normal(0, sd), length(H))
  W = b * H + U
  W
end
sim_weight (generic function with 1 method)

Code 3.3

H = rand(Uniform(130, 170), 200);
W = sim_weight(H, 0.5, 5);
plot(H, W, seriestype=:scatter, markerstrokecolor=:red, markeralpha=.8,
     markercolor=:white, markersize=4, markerstrokewidth=3,
     ylabel="W", xlabel="H")

Code 3.4

beta_seq = collect(0:.1:1)
11-element Vector{Float64}:
 0.0
 0.1
 0.2
 0.3
 0.4
 0.5
 0.6
 0.7
 0.8
 0.9
 1.0

Code 3.5

Random.seed!(93);
H = rand(Uniform(130, 170), 1);
W = sim_weight(H, 0.5, 10);

Code 3.6

Need to define compute_post (Code 3.12)

# reproduce `log_sum_exp`
function log_sum_exp(x)
  xmax = maximum(x)
  xsum = sum( exp.( x .- xmax ) )
  xmax + log(xsum)
end

function compute_logPrW(W, H, a, b, sigma, prior=1)
  mu = a .+ b .* H
  sum(logpdf.(Normal.(mu, sigma), W)) + log(prior)
end

function compute_post(W, H, a, b, sigma, prior=1)
  G = [[a, b[j], sigma] for j in 1:length(b)]
  # Compute probability of each
  post = [compute_logPrW(W, H, G[i][1], G[i][2], G[i][3], 1) 
          for i in 1:length(G)]
  post = exp.(post .- log_sum_exp(post))
  # SOURCE
  # https://discourse.julialang.org/t/how-to-convert-vector-of-vectors-to-matrix/72609/27
  mat = hcat(stack(G, dims=1), post)
  NamedArray(round.(mat', digits=3), (["a", "b", "sigma", "post"], 1:length(beta_seq)), ("vars","elem"))
end
compute_post (generic function with 2 methods)
# code 3.6
beta_seq = collect(0:.1:1)
post = compute_post(W,H,0,beta_seq,10)
4×11 Named Matrix{Float64}
vars ╲ elem │     1      2      3      4  …      8      9     10     11
────────────┼──────────────────────────────────────────────────────────
a           │   0.0    0.0    0.0    0.0  …    0.0    0.0    0.0    0.0
b           │   0.0    0.1    0.2    0.3       0.7    0.8    0.9    1.0
sigma       │  10.0   10.0   10.0   10.0      10.0   10.0   10.0   10.0
post        │   0.0    0.0  0.002  0.064  …  0.003    0.0    0.0    0.0

Code 3.7

# need to convert named array to vector using vec()
bar(beta_seq, vec(post["post",:]), xlabel="beta", ylabel="posterior probability")

Code 3.8

n = 101
beta_seq = collect(range(0,1,length=n))
post = vec(compute_post(W,H,0,beta_seq,10)["post", :])

plot(H, W, xlim=(130,170), ylim=(50,90), xlabel="height (cm)", 
     ylabel="weight (kg)", color=:blue, linewidth=3)

for j in 1:n
    slope = beta_seq[j]
    lwd = 5*post[j]/maximum(post)
    Plots.abline!(slope, 0, line=:solid, width=lwd, color=:black,
                  linealpha=.6) 
end

scatter!(H, W, markersize=4, markerstrokecolor=:red,
         markeralpha=.9, markercolor=:white, markerstrokewidth=4)
title!("N = 1")

Code 3.9

# append a new observation to H and W
H_new = rand(Uniform(130,170), 1)
W_new = sim_weight( H_new , 0.5 , 10 )
H = vcat(H,H_new)
W = vcat(W,W_new)
# recompute posterior
post = vec(compute_post(W,H,0,beta_seq,10)["post", :])

# initialize empty plot
plot(xlim=(130,170), ylim=(50,90), xlabel="height (cm)", ylabel="weight (kg)")

for j in 1:n
    slope = beta_seq[j]
    lwd = 5*post[j]/maximum(post)
    Plots.abline!(slope, 0, line=:solid, width=lwd, color=:black,
                  linealpha=.6) 
end

scatter!(H, W, markersize=4, markerstrokecolor=:red,
         markeralpha=.9, markercolor=:white, markerstrokewidth=4)
title!(string("N = ", length(W)))

Fig 3.3

# create an array of arrays (ragged_array)
Random.seed!(94);
n = 101
beta_seq = collect(range(0,1,length=n));
H = rand(Uniform(130, 170), 1);
W = sim_weight(H, 0.5, 10);
# initialize empty arrays
HH = []
WW = []
# add first observation to HH and WW
push!(HH, H)
push!(WW, W)
for j in 2:n
  H_new = rand(Uniform(130,170), 1)
  W_new = sim_weight( H_new , 0.5 , 10 )
  push!(HH, vcat(HH[j-1],H_new))
  push!(WW, vcat(WW[j-1],W_new))
end
# recompute posterior
PP = [ vec(compute_post(WW[i],HH[i],0,beta_seq,10)["post", :]) for i in 1:n ]; 

# initialize empty plots
plots = Plots.Plot[plot(title="N = $i") for i in 1:n]
for i in 1:n
  p = plot(xlim=(130,170), ylim=(50,90), 
           xlabel="height (cm)", ylabel="weight (kg)",
           title=string("N = ", i))
  for j in 1:n
    slope = beta_seq[j]
    lwd = 4*PP[i][j]/maximum(PP[i])
    p = Plots.abline!(p, slope, 0, line=:solid, width=lwd, color=:black,
                  linealpha=.8) 
  end
  plots[i] = scatter!(p, HH[i], WW[i], 
          markercolor=:white, markersize=4, markeralpha=.8,
          markerstrokecolor=:red, markerstrokewidth=2)
end

# plot grid
plot(plots[1], plots[2], plots[3], plots[10], plots[20], plots[89], 
    layout=(3,2), size = (800,1200))